###############################################################################
# Replikationsdatensatz: Die Sonntagsfrage, Soziale Erw�nschtheit und die AfD #
#                                                                             #
# von: Roni Lehrer                                                            #
# lehrer@uni-mannheim.de                                                      #
# zuletzt ge�ndert am: 18. Juli 2018                                          #
# Getest mit R version 3.5.0                                                  #
###############################################################################

rm(list=ls())

library(data.table)  #getestet mit Version 1.11.4
library(ggplot2)     #getestet mit Version 2.2.1
library(readstata13) #getestet mit Version 0.9.2
library(boot)        #getestet mit Version 1.3-20
library(MASS)        #getestet mit Version 7.3-50
library(grid)        #getestet mit Version 0.7-4

### Die folgende Funktion wird sp�ter zur Kombination der verschiedenen Graphen ben�tigt.
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  numPlots = length(plots)
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }
 if (numPlots==1) {
    print(plots[[1]])
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}
### Ende der Funktion

#################################################################################################
# Der Hauptatensatz kann im GESIS-Datenarchiv heruntergeladen werden:                           #
#                                                                                               #
# Blom, Annelies G.; Felderer, Barbara; Herzing, Jessica; Krieger, Ulrich; Rettig, Tobias;      #
# SFB 884 �Political Economy of Reforms�, Universit�t Mannheim (2018): German Internet Panel,   #
# Welle 30 (Juli 2017). GESIS Datenarchiv, K�ln. ZA6904 Datenfile Version 1.0.0,                #
# doi: 10.4232/1.12977                                                                          #
#                                                                                               #
# Internet: http://dx.doi.org/10.4232/1.12977                                                   #
# Studiennummer: ZA6904                                                                         #
#################################################################################################


#Einlesen der Daten
gip <- as.data.table(read.dta13("ZA6904_v1-0-0.dta", convert.factors=F))

#Reduktion des Datensatzen auf relevante Variablen
data <- data.table(gip$id_g, gip$expCL30001, gip$AA30039, gip$CL30001, gip$CL30002
                   ,gip$CL30003, gip$CL30004, gip$CL30005, gip$CL30012,
                   gip$gender_16, gip$year_of_birth_cat_16, gip$educ_school_16,
                   gip$online_status, gip$state_16, gip$dDatum, gip$educ_job_16,
                   gip$AA30041a9, gip$AA30040a)
colnames(data) <- c("id","group","DQ","RRT","ICT1ctrl","ICT1trt","ICT2trt","ICT2ctrl","crowd",
                    "gender", "year_of_birth_cat_16", "educ_school_16", "online_status", "state_16",
                   "dDatum", "educ_job_16",
                    "plcAfD", "plcSelf")

data$east <- ifelse(data$state_16>10,1,0)         #Dummy f�r Ostdeutschland
data$internet <- ifelse(data$online_status>0,1,0) #Dummy f�r Internetzugang
data$afd <- ifelse(data$DQ==11,1,0)               #Dummy f�r AfD vote intention
data$online_status <- NULL                        #L�schen der Variablen
data$state_16 <- NULL                             #L�schen der Variablen



#Behalte nur F�lle, die alle relevanten Fragen gesehen (nicht beantwortet!) haben
data <- data[is.na(DQ)==FALSE]      #Sonntagsfrage beantwortet
data <- data[is.na(crowd)==FALSE]   #WdV-Frage beantwortet

##############################################
# Tabelle 2: Fragetyp und fehlende Antworten #
##############################################
befragte <- c(nrow(data),
              nrow(data),
              nrow(data[group==1]),
              nrow(data[group==2|group==3]),
              nrow(data[group==2]),
              nrow(data[group==3])
              )
missings <- c(nrow(data[DQ<0]),
              nrow(data[crowd<0]),
              nrow(data[group==1&RRT<0]),
              nrow(data[(group==2&(ICT1ctrl<0|ICT1trt<0))|(group==3&(ICT2ctrl<0|ICT2trt<0))]),
              nrow(data[group==2&(ICT1ctrl<0|ICT1trt<0)]),
              nrow(data[group==3&(ICT2ctrl<0|ICT2trt<0)])
              )
tab2 <- cbind(c("Sonntagsfrage", "WdV", "RAT", "Liste", "A", "B"),
              befragte,
              round(befragte/max(befragte)*100, digit=1),
              missings,
              round((befragte-missings)/befragte*100, digit=1)
              )
tab2


### Matrix f�r die Ergebnisse
result <- matrix(NA,ncol=3,nrow=4)
rownames(result) <- c("DQ","WdV","RAT","D-List")
colnames(result) <- c("lowerCI","Estimate","upperCI")


### 1. Sonntagsfrage
data <- as.data.frame(data)
boots <- c()
set.seed(834950)
for (i in 1:1000) {
    dat <- sample(1:nrow(data), 865, TRUE)
    boots <- c(boots, table(data$DQ[dat]==11)[2]/table(data$DQ[dat]%in%c(-99, -98, 1, 2))[1])
}
est <- mean(boots)
low <- quantile(boots, .025)
up <- quantile(boots, .975)
result[1,] <- cbind(low*100,est*100,up*100)

### 2. Weisheit der Vielen
boots <- c()
set.seed(133707)
for (i in 1:1000) {
    dat <- sample(1:nrow(data), 865, TRUE)
    dat <- data$crowd[dat]
    dat <- ifelse(dat==-90, NA, dat)
    boots <- c(boots, mean(dat, na.rm=TRUE))
}
est <- mean(boots)
low <- quantile(boots, .025)
up <- quantile(boots, .975)
result[2,] <- cbind(low,est,up)

### 3. Kreuzweise RAT
boots <- c()
set.seed(133707)
for (i in 1:1000) {
    dat <- sample(as.numeric(rownames(data[data$group==1,])), 865, TRUE)
    dat <- data[dat,"RRT"]
    dat <- dat[is.na(dat)==FALSE&dat>0]
    n <- length(dat) # number of responses
    r <- length(dat[dat==1]) # Number of "Yes" answers
    boots <- c(boots, ((r/n)+.7-1)/(2*.7-1))
}
est <- mean(boots)
low <- quantile(boots, .025)
up <- quantile(boots, .975)
result[3,] <- cbind(low*100,est*100,up*100)

### 3. Listenexperiment
A <- cbind(data$ICT1ctrl,data$ICT1trt)
B <- cbind(data$ICT2ctrl,data$ICT2trt)
for(i in 1:2){
  A[A[,i]<0,i] <- NA
  B[B[,i]<0,i] <- NA
}
A <- A[complete.cases(A),]
B <- B[complete.cases(B),]

est <- ((mean(A[,2])-mean(A[,1]))+(mean(B[,2])-mean(B[,1])))/2

double.list <- function(dat, indices) {
    A <- dat[indices,1:2]
    B <- dat[indices,3:4]
    return(((mean(A[,2])-mean(A[,1]))+(mean(B[,2])-mean(B[,1])))/2)
}
booted <- boot(cbind(A[1:nrow(B),],B), double.list, R=1000)
low <- quantile(booted$t, probs=.025)
up <- quantile(booted$t, probs=.975)
result[4,] <- cbind(low*100,est*100,up*100)

###############
# Abbildung 1 #
###############
result <- cbind(result, rownames(result))
colnames(result)[4] <- "type"
result <- as.data.table(result)
result <- result[type!="ICT1"&type!="ICT2"]
result$Estimate <- as.numeric(result$Estimate)
result$lowerCI <- as.numeric(result$lowerCI)
result$upperCI <- as.numeric(result$upperCI)
result$y <- ifelse(result$type=="DQ", 4,
                   ifelse(result$type=="D-List", 3,
                          ifelse(result$type=="RAT", 2, 1)))
result$lab <- factor(result$y, labels=c("Weisheit\nder Vielen", "Kreuzweise\nRAT",
                                   "Doppeltes\nListenexperiment", "Sonntagsfrage"))
p <- ggplot(data=result, aes(x=Estimate, group=type, y=lab))
p <- p + geom_vline(aes(xintercept=12.6), linetype=2)
p <- p + geom_segment(aes(yend=lab, x=lowerCI, xend=upperCI))
p <- p + geom_point(size=.6)
p <- p + theme_bw()
p <- p + theme(legend.title=element_blank())
p <- p + theme(panel.grid.major.x = element_blank())
p <- p + theme(panel.grid.major.y = element_blank())
p <- p + theme(panel.grid.minor.x = element_blank())
p <- p + theme(panel.grid.minor.y = element_blank())
p <- p + geom_text(aes(y=y+.08, label=round(Estimate, 1)), size=1.8)
p <- p + xlab("Anteil der AfD-Zweitstimmen (in %)")
p <- p + theme(text = element_text(size=7))
p <- p + ylab("")
p
ggsave("Abb1.tiff", dpi=500, width=8, height=8, units="cm")

#########################################
# Beantwortungszeitanalyse (Perzentile) #
#########################################

##################################################################################################
# Die Paradaten der Welle 30 des GIP sind nur auf Anfrage erh�ltlich.                            #
# Anfragen k�nnen direkt an das GIP-Team am SFB 884 an der Universit�t Mannheim gestellt werden. #
#                                                                                                #
# Email: gip@uni-mannheim.de                                                                     #
# Internet: http://reforms.uni-mannheim.de/internet_panel/Internet_Panel/                        #
##################################################################################################

paradata <- read.dta13("GIP_paradata_W30_V1.dta", convert.factors=F, convert.dates=F)
para <- data.frame(paradata$id_g, paradata$dauer_f016, paradata$dauer_f050, paradata$dauer_f051
                    ,paradata$dauer_f052, paradata$dauer_f053, paradata$dauer_f054, paradata$dauer_f055)
colnames(para) <- c("id","dDQ","dRRT","dICT1ctrl","dICT1trt","dICT2trt","dICT2ctrl","dcrowd")
para <- as.data.table(para)
para$dDQ <- para$dDQ/1000
para$dRRT <- para$dRRT/1000
para$dICT1ctrl <- para$dICT1ctrl/1000
para$dICT1trt <- para$dICT1trt/1000
para$dICT2ctrl <- para$dICT2ctrl/1000
para$dICT2trt <- para$dICT2trt/1000
para$dcrowd <- para$dcrowd/1000

data <- merge(data, para, by="id")


### Matrix f�r die Ergebnisse
qresult <- array(NA, dim=c(4,3,95),
                 dimnames=list(c("DQ","WoC","RRT", "D-ICT"),
                               c("lowerCI","Estimate","upperCI"),
                               c()))

#get percentiles of answering time (for lists: number of elements are considered too)
q.DQ <- quantile(data$dDQ, probs=(1:100)/100, na.rm=TRUE)
q.crowd <- quantile(data$dcrowd, probs=(1:100)/100, na.rm=TRUE)
q.RRT <- quantile(data$dRRT, probs=(1:100)/100, na.rm=TRUE)
q.ICTtrt <- quantile(c(data$dICT1trt, data$dICT2trt), probs=(1:100)/100, na.rm=TRUE)
q.ICTctrl <- quantile(c(data$dICT1ctrl, data$dICT2ctrl), probs=(1:100)/100, na.rm=TRUE)

data <- as.data.frame(data)
for (t in 1:95) {
    print(t)
### 1. Sonntagsfrage
 boots <- c()
 set.seed(834950) #von random.com
 for (i in 1:1000) {
     dat <- sample(as.numeric(rownames(data[data$dDQ>=q.DQ[t],])), 865, TRUE)
     boots <- c(boots, table(data$DQ[dat]==11)[2]/table(data$DQ[dat]%in%c(-99, -98, 1, 2))[1])
 }
 est <- mean(boots)
 low <- quantile(boots, .025)
 up <- quantile(boots, .975)
 qresult[1,,t] <- cbind(low*100,est*100,up*100)

### 2. Weisheit der Vielen
 boots <- c()
 set.seed(133707) #von random.com
 for (i in 1:1000) {
     dat <- sample(as.numeric(rownames(data[data$dcrowd>=q.crowd[t],])), 865, TRUE)
     dat <- data$crowd[dat]
     dat <- ifelse(dat==-90, NA, dat)
     boots <- c(boots, mean(dat, na.rm=TRUE))
 }
 est <- mean(boots)
 low <- quantile(boots, .025)
 up <- quantile(boots, .975)
 qresult[2,,t] <- cbind(low,est,up)

### 3. Kreuzweise RAT
 boots <- c()
 set.seed(133707)
 for (i in 1:1000) {
     dat <- sample(as.numeric(rownames(data[data$group==1&data$dRRT>=q.RRT[t],])), 865, TRUE)
     dat <- data[dat,"RRT"]
     dat <- dat[is.na(dat)==FALSE&dat>0]
     n <- length(dat) # number of responses
     r <- length(dat[dat==1]) # Number of "Yes" answers
     boots <- c(boots, ((r/n)+.7-1)/(2*.7-1))
 }
 est <- mean(boots)
 low <- quantile(boots, .025)
 up <- quantile(boots, .975)
 qresult[3,,t] <- cbind(low*100,est*100,up*100)

### 3. Listenexperiment
  A <- cbind(data$ICT1ctrl[data$dICT1ctrl>=q.ICTctrl[t]&data$dICT1trt>=q.ICTtrt[t]],
             data$ICT1trt[data$dICT1ctrl>=q.ICTctrl[t]&data$dICT1trt>=q.ICTtrt[t]])
  B <- cbind(data$ICT2ctrl[data$dICT2ctrl>=q.ICTctrl[t]&data$dICT2trt>=q.ICTtrt[t]],
             data$ICT2trt[data$dICT2ctrl>=q.ICTctrl[t]&data$dICT2trt>=q.ICTtrt[t]])
 for(i in 1:2){
   A[A[,i]<0,i] <- NA
   B[B[,i]<0,i] <- NA
 }
 A <- A[complete.cases(A),]
 B <- B[complete.cases(B),]

 est <- ((mean(A[,2])-mean(A[,1]))+(mean(B[,2])-mean(B[,1])))/2

 double.list <- function(dat, indices) {
     A <- dat[indices,1:2]
     B <- dat[indices,3:4]
     return(((mean(A[,2])-mean(A[,1]))+(mean(B[,2])-mean(B[,1])))/2)
 }
 mini <- min(nrow(A), nrow(B))
 booted <- boot(cbind(A[1:mini,],B[1:mini,]), double.list, R=1000)
 low <- quantile(booted$t, probs=.025)
 up <- quantile(booted$t, probs=.975)
 # results
 cbind(low,est,up)
 qresult[4,,t] <- cbind(low*100,est*100,up*100)
}

########
# Plot #
########

qdata <- matrix(nrow=0, ncol=4)
for (i in 1:dim(qresult)[3]) {
    qdata <- rbind(qdata, cbind(qresult[,,i], i))
}
qdata <- cbind(qdata, rownames(qdata))
colnames(qdata) <- c("lowerCI", "Estimate", "upperCI", "t", "type")
qdata <- as.data.table(qdata)
qdata$Estimate <- as.numeric(qdata$Estimate)
qdata$t <- as.numeric(qdata$t)
qdata$lowerCI <- as.numeric(qdata$lowerCI)
qdata$upperCI <- as.numeric(qdata$upperCI)
qdata$y <- ifelse(qdata$type=="DQ", 4,
                   ifelse(qdata$type=="D-ICT", 3,
                          ifelse(qdata$type=="RRT", 2, 1)))
qdata$lab <- factor(qdata$y, labels=c("Weisheit\nder Vielen", "Kreuzweise\nRAT",
                                   "Doppeltes\nListenexperiment", "Sonntagsfrage"))


p <- ggplot(data=qdata[type!="ICT1"&type!="ICT2"&t<=95], aes(x=t, group=type))
p <- p + geom_line(aes(y=Estimate, linetype=as.factor(lab)))
## p <- p + geom_ribbon(aes(ymin=lowerCI,
##                                 ymax=upperCI, fill=as.factor(type)))
p <- p + theme_bw()
p <- p + theme(panel.grid.major.x = element_blank())
p <- p + theme(panel.grid.major.y = element_blank())
p <- p + theme(panel.grid.minor.x = element_blank())
p <- p + theme(panel.grid.minor.y = element_blank())
p <- p + geom_hline(aes(yintercept=12.6), linetype=2)
p <- p + xlab("Perzentile der Beantwortungszeit")
p <- p + ylab("Anteil der AfD-Zweitstimmen (in %)")
p <- p + theme(text = element_text(size=5))
p <- p + theme(legend.key.height=unit(.8,"line"))
p <- p + theme(legend.position=c(.17,.17))
p <- p + theme(legend.title=element_blank())
p <- p + theme(legend.box.background = element_rect(colour = "black"))
p <- p + expand_limits(y=c(29))
p
ggsave("Abb2.tiff", dpi=500, width=8, height=8, units="cm")

##################
# Nur AfD-W�hler #
##################
data <- as.data.table(data)
afd <- data[DQ==11]

### Combined Results
result <- matrix(NA,ncol=3,nrow=3)
rownames(result) <- c("WoC","RRT","D-ICT")
colnames(result) <- c("lowerCI","Estimate","upperCI")


afd <- as.data.frame(afd)
### 2. Wisdom of Crowds: RESAMPLING
boots <- c()
set.seed(133707)
for (i in 1:1000) {
    dat <- sample(1:nrow(afd), 178, TRUE)
    dat <- afd$crowd[dat]
    dat <- ifelse(dat==-90, NA, dat)
    boots <- c(boots, mean(dat, na.rm=TRUE))
}
est <- mean(boots)
low <- quantile(boots, .025)
up <- quantile(boots, .975)
result[1,] <- cbind(low,est,up)


### 3. Crosswise RRT: RESAMPLING
boots <- c()
set.seed(133707)
for (i in 1:1000) {
    dat <- sample(as.numeric(rownames(afd[afd$group==1,])), 178, TRUE)
    dat <- afd[dat,"RRT"]
    dat <- dat[is.na(dat)==FALSE&dat>0]
    n <- length(dat) # number of responses
    r <- length(dat[dat==1]) # Number of "Yes" answers
    boots <- c(boots, ((r/n)+.7-1)/(2*.7-1))
}
est <- mean(boots)
low <- quantile(boots, .025)
up <- quantile(boots, .975)
result[2,] <- cbind(low*100,est*100,up*100)

#my double ict
A <- cbind(afd$ICT1ctrl,afd$ICT1trt)
B <- cbind(afd$ICT2ctrl,afd$ICT2trt)
for(i in 1:2){
  A[A[,i]<0,i] <- NA
  B[B[,i]<0,i] <- NA
}
A <- A[complete.cases(A),]
B <- B[complete.cases(B),]
est <- ((mean(A[,2])-mean(A[,1]))+(mean(B[,2])-mean(B[,1])))/2
double.list <- function(dat, indices) {
    A <- dat[indices,1:2]
    B <- dat[indices,3:4]
    return(((mean(A[,2])-mean(A[,1]))+(mean(B[,2])-mean(B[,1])))/2)
}
booted <- boot(cbind(A,B[1:nrow(A),]), double.list, R=1000)
low <- quantile(booted$t, probs=.025)
up <- quantile(booted$t, probs=.975)
# results
cbind(low,est,up)
result[3,] <- cbind(low*100,est*100,up*100)

###############
# Abbildung 3 #
###############
result <- cbind(result, rownames(result))
colnames(result)[4] <- "type"
result <- as.data.table(result)
result <- result[type!="ICT1"&type!="ICT2"]
result$Estimate <- as.numeric(result$Estimate)
result$lowerCI <- as.numeric(result$lowerCI)
result$upperCI <- as.numeric(result$upperCI)
result$y <- ifelse(result$type=="DQ", 4,
                   ifelse(result$type=="D-ICT", 3,
                          ifelse(result$type=="RRT", 2, 1)))
result$lab <- factor(result$y, labels=c("Weisheit\nder Vielen", "Kreuzweise\nRAT",
                                   "Doppeltes\nListenexperiment"))


p <- ggplot(data=result[type!="WoC"], aes(x=Estimate, group=type, y=lab))
p <- p + geom_vline(aes(xintercept=100), linetype=2)
p <- p + geom_segment(aes(yend=lab, x=lowerCI, xend=upperCI))
p <- p + geom_point(size=1)
p <- p + theme_bw()
p <- p + theme(legend.title=element_blank())
p <- p + theme(panel.grid.major.x = element_blank())
p <- p + theme(panel.grid.major.y = element_blank())
p <- p + theme(panel.grid.minor.x = element_blank())
p <- p + theme(panel.grid.minor.y = element_blank())
p <- p + geom_text(aes(y=y-.95, label=round(Estimate, 1)), size=2)
p <- p + theme(text = element_text(size=7))
p <- p + xlab("Erwarteter Anteil der AfD-Zweitstimmen\nunter AfD-W�hlern (in %)")
p <- p + ylab("")
p
ggsave("Abb3.tiff", dpi=500, width=8, height=8, units="cm")


#############################
# Tabelle 3 und Abbildung 4 #
#############################

notAfd <- data[afd==0&(group==2|group==3)]
notAfd$plcSelf <- ifelse(notAfd$plcSelf<0, NA, notAfd$plcSelf)
notAfd$plcAfD <- ifelse(notAfd$plcAfD<0, NA, notAfd$plcAfD)
notAfd$dist <- abs(notAfd$plcAfD-notAfd$plcSelf)
notAfd <- notAfd[group==2&is.na(ICT1trt)==FALSE&is.na(ICT1ctrl)==FALSE,
                 more2:=ifelse(ICT1trt>ICT1ctrl, 1, 0)]
notAfd <- notAfd[group==3&is.na(ICT2trt)==FALSE&is.na(ICT2ctrl)==FALSE,
                 more3:=ifelse(ICT2trt>ICT2ctrl, 1, 0)]
notAfd$more <- ifelse(notAfd$group==2, notAfd$more2, notAfd$more3)
notAfd$more2 <- NULL
notAfd$more3 <- NULL
notAfd <- notAfd[is.na(more)==FALSE&is.na(dist)==FALSE]
logit <- glm(more~dist, data=notAfd, family=binomial(link='logit'))

###########
# Table 3 #
###########
summary(logit)


betas <- mvrnorm(1000, coef(logit), vcov(logit))
counter <- cbind(1, 0:10)
linpred <- betas%*%t(counter)
logit2prob <- function(logit){
  odds <- exp(logit)
  prob <- odds / (1 + odds)
  return(prob)
}
probs <- logit2prob(linpred)*100
lower <- apply(probs, 2, quantile, .025)
upper <- apply(probs, 2, quantile, .975)
pdata <- as.data.table(data.frame(case=0:10,
                                  lower=lower,
                                  upper=upper,
                                  mean=lower+(.5*(upper-lower))
                                  ))

freq <- as.data.table(table(notAfd$dist))
freq$V1 <- as.numeric(freq$V1)

###############
# Abbildung 4 #
###############
p <- ggplot(data=pdata, aes(x=case))
p <- p + geom_line(aes(y=mean))
p <- p + geom_line(aes(y=upper), linetype=2)
p <- p + geom_line(aes(y=lower), linetype=2)
## p <- p + geom_ribbon(aes(ymin=lowerCI,
##                                 ymax=upperCI, fill=as.factor(type)))
p <- p + theme_bw()
p <- p + theme(legend.title=element_blank())
p <- p + theme(panel.grid.major.x = element_blank())
p <- p + theme(panel.grid.major.y = element_blank())
p <- p + theme(panel.grid.minor.x = element_blank())
p <- p + theme(panel.grid.minor.y = element_blank())
p <- p + xlab("Wahrgenommene Distanz zur AfD")
p <- p + ylab("Erwartete Wahrscheinlichkeit einer AfD-Wahl (in %)")
p <- p + scale_x_continuous(breaks =0:10)
p <- p + scale_y_continuous(expand = c(0, 0))
p <- p + expand_limits(y = 51)
p <- p + geom_segment(data=freq, aes(x=V1, xend=V1, y=19, yend=(N/80)+19))
p <- p + theme(plot.margin = unit(c(1,1,0,1), "cm")) # ("left", "right", "bottom", "top")
p <- p + theme(text = element_text(size=7))
p
ggsave("Abb4.tiff", dpi=500, width=8, height=8, units="cm")


####################################
# balance test for question types  #
####################################
data <- as.data.table(data)
data$gruppe <- ifelse(data$group==1, "Kreuzweise\nRAT", ifelse(data$group==2, "Listenexperiment\nGruppe A",
                                                                "Listenexperiment\nGruppe B"))
p1 <- ggplot(data[is.na(gruppe)==FALSE],aes(x=gender,gruppe=gruppe, fill=as.factor(gruppe)))+
    geom_histogram(position="dodge",binwidth=0.25)+theme_bw()+ylab("Anzahl Befragte")+
    scale_x_continuous(breaks=c(1,2), labels=c("m�nnlich", "weiblich"))+ggtitle("Geschlecht")+
    theme(legend.position="none")+xlab("")+theme(plot.title = element_text(hjust = 0.5))+ scale_fill_grey()
p1


p2 <- ggplot(data[is.na(gruppe)==FALSE],aes(x=afd,gruppe=gruppe, fill=as.factor(gruppe)))+
    geom_histogram(position="dodge",binwidth=0.25)+theme_bw()+ylab("")+
    scale_x_continuous(breaks=c(0,1), labels=c("andere Angabe", "AfD-W�hler"))+
    ggtitle("AfD-W�hler laut Sonntagsfrage")+
    theme(legend.position="none")+xlab("")+theme(plot.title = element_text(hjust = 0.5))+ scale_fill_grey()
p2

p3 <- ggplot(data[is.na(gruppe)==FALSE&year_of_birth_cat_16>0],aes(x=year_of_birth_cat_16,
                           gruppe=gruppe,
                           fill=as.factor(gruppe)))+
    geom_histogram(position="dodge",binwidth=0.25)+theme_bw()+ylab("")+
    scale_x_continuous(breaks=1:13,
                       labels=c("1935-1939", "1940-1944", "1945-1949", "1950-1954",
                           "1955-1959", "1960-1964", "1965-1969", "1970-1974",
                           "1975-1979", "1980-1984", "1985-1989", "1990-1994", "1995-1999"))+
    ggtitle("Geburtskohorte")+theme(axis.text.x = element_text(angle = 45, hjust = 1))+
    theme(legend.position="none")+xlab("")+theme(plot.title = element_text(hjust = 0.5))+ scale_fill_grey()
p3

p4 <- ggplot(data[is.na(gruppe)==FALSE&educ_school_16>0],aes(x=educ_school_16,
                           gruppe=gruppe,
                           fill=as.factor(gruppe)))+
    geom_histogram(position="dodge",binwidth=0.25)+theme_bw()+ylab("Anzahl Befragte")+
    scale_x_continuous(breaks=1:7,
                       labels=c("Sch�ler", "kein Schulabschluss", "Hauptschulabschluss",
                           "Mittlere Reife", "Fachholschulreife",
                           "Abitur", "Anderer Schulabschluss"))+
    ggtitle("H�chster Schulabschluss")+theme(axis.text.x = element_text(angle = 45, hjust = 1))+
    xlab("")+theme(plot.title = element_text(hjust = 0.5))+scale_fill_grey(name="")+
    theme(legend.key.height=unit(3,"line"))
p4


p5 <- ggplot(data[is.na(gruppe)==FALSE],aes(x=internet,
                           gruppe=gruppe,
                           fill=as.factor(gruppe)))+
    geom_histogram(position="dodge",binwidth=0.25)+theme_bw()+ylab("Anzahl Befragte")+
    scale_x_continuous(breaks=0:1,
                       labels=c("Internet auch ohne GIP", "Internet durch GIP bereitgestellt"))+
    ggtitle("Internetzugang durch GIP bereitgestellt")+xlab("")+theme(legend.position="none")+
    theme(plot.title = element_text(hjust = 0.5))+ scale_fill_grey()
p5

p6 <- ggplot(data[is.na(gruppe)==FALSE],aes(x=dDatum,
                           gruppe=gruppe,
                           fill=as.factor(gruppe)))+
    geom_histogram(position="dodge",binwidth=0.25)+theme_bw()+ylab("Anzahl Befragte")+
    ggtitle("Datum der Befragung")+
    theme(legend.position="none")+xlab("")+theme(plot.title = element_text(hjust = 0.5))+ scale_fill_grey()
p6

p7 <- ggplot(data[is.na(gruppe)==FALSE],aes(x=east,
                           gruppe=gruppe,
                           fill=as.factor(gruppe)))+
    geom_histogram(position="dodge",binwidth=0.25)+theme_bw()+ylab("")+
    scale_x_continuous(breaks=0:1,
                       labels=c("Alte Bundesl�nder", "Neue Bundesl�nder"))+
    ggtitle("Wohnort")+xlab("")+
    theme(plot.title = element_text(hjust = 0.5))+theme(legend.position="none")+ scale_fill_grey()
p7

tiff("Abb5.tiff", width=590, height=590)
 p <- multiplot(p1, p3, p6, p2, p5, p7, p4,  layout=rbind(c(1,2),
                                                          c(3,4),
                                                          c(5,6),
                                                          c(7,7),
                                                          c(7,7)))
dev.off()
#F�r abb5.eps: Manuell als Postscript speichern.


###############
# Hausnummern #
###############
num <- as.data.table(read.csv("hausnummern.csv", sep=";"))
num$end <- cumsum(num$freq)
num$start <- num$end-num$freq
num$label.x <- num$start+((num$end-num$start)/2)

p <- ggplot(data=num)
p <- p + geom_rect(aes(xmin=start, xmax=end, ymin=0, ymax=250, fill=as.factor(number)),
                   show.legend=FALSE)
p <- p + scale_fill_grey()
p <- p + scale_x_continuous(breaks=max(num$end)*0:10/10, label=as.character(0:10*10))
p <- p + theme_bw()
p <- p + theme(panel.grid.major.x = element_blank())
p <- p + theme(panel.grid.major.y = element_blank())
p <- p + theme(panel.grid.minor.x = element_blank())
p <- p + theme(panel.grid.minor.y = element_blank())
p <- p + theme(axis.text.y=element_blank())
p <- p + theme(axis.ticks.y=element_blank())
p <- p + theme(panel.border = element_blank())
p <- p + coord_fixed()
p <- p + geom_text(aes(x=label.x, y=350, label=as.character(number)), size=1.8)
p <- p + geom_text(aes(x=150, y=350, label="Ziffer:"), size=1.8)
p <- p + ylab("")
p <- p + xlab("Perzentile der ersten Ziffer der Hausnummer")
p <- p + geom_vline(xintercept=max(num$end)*.7, linetype=2)
p <- p + expand_limits(y=400)
p <- p + theme(text = element_text(size=6))
p
ggsave("Abb6.tiff", dpi=500, width=6, height=2, unit="cm")


############################
# Seconds on Question Page #
############################

### Combined Results
presult <- array(NA, dim=c(4,3,61),
                 dimnames=list(c("DQ","WoC","RRT", "D-ICT"),
                               c("lowerCI","Estimate","upperCI"),
                               c()))

data <- as.data.frame(data)
for (t in 1:61) {
    print(t)
 ## 1. Direct Question: RESAMPLING
 boots <- c()
 set.seed(834950)
 for (i in 1:1000) {
     dat <- sample(as.numeric(rownames(data[data$dDQ>=t,])), 865, TRUE)
     boots <- c(boots, table(data$DQ[dat]==11)[2]/table(data$DQ[dat]%in%c(-99, -98, 1, 2))[1])
 }
 est <- mean(boots)
 low <- quantile(boots, .025)
 up <- quantile(boots, .975)
 presult[1,,t] <- cbind(low*100,est*100,up*100)

 ### 2. Wisdom of Crowds: RESAMPLING
 boots <- c()
 set.seed(133707)
 for (i in 1:1000) {
     dat <- sample(as.numeric(rownames(data[data$dcrowd>=t,])), 865, TRUE)
     dat <- data$crowd[dat]
     dat <- ifelse(dat==-90, NA, dat)
     boots <- c(boots, mean(dat, na.rm=TRUE))
 }
 est <- mean(boots)
 low <- quantile(boots, .025)
 up <- quantile(boots, .975)
 presult[2,,t] <- cbind(low,est,up)

 ### 3. Crosswise RRT: RESAMPLING
 boots <- c()
 set.seed(133707)
 for (i in 1:1000) {
     dat <- sample(as.numeric(rownames(data[data$group==1&data$dRRT>=t,])), 865, TRUE)
     dat <- data[dat,"RRT"]
     dat <- dat[is.na(dat)==FALSE&dat>0]
     n <- length(dat) # number of responses
     r <- length(dat[dat==1]) # Number of "Yes" answers
     boots <- c(boots, ((r/n)+.7-1)/(2*.7-1))
 }
 est <- mean(boots)
 low <- quantile(boots, .025)
 up <- quantile(boots, .975)
 presult[3,,t] <- cbind(low*100,est*100,up*100)

 #my double ict
  A <- cbind(data$ICT1ctrl[data$dICT1ctrl>=t&data$dICT1trt>=t],
             data$ICT1trt[data$dICT1ctrl>=t&data$dICT1trt>=t])
  B <- cbind(data$ICT2ctrl[data$dICT2ctrl>=t&data$dICT2trt>=t],
             data$ICT2trt[data$dICT2ctrl>=t&data$dICT2trt>=t])
 for(i in 1:2){
   A[A[,i]<0,i] <- NA
   B[B[,i]<0,i] <- NA
 }
 A <- A[complete.cases(A),]
 B <- B[complete.cases(B),]

 est <- ((mean(A[,2])-mean(A[,1]))+(mean(B[,2])-mean(B[,1])))/2

 double.list <- function(dat, indices) {
     A <- dat[indices,1:2]
     B <- dat[indices,3:4]
     return(((mean(A[,2])-mean(A[,1]))+(mean(B[,2])-mean(B[,1])))/2)
 }
 mini <- min(nrow(A), nrow(B))
 booted <- boot(cbind(A[1:mini,],B[1:mini,]), double.list, R=1000)
 low <- quantile(booted$t, probs=.025)
 up <- quantile(booted$t, probs=.975)
 # results
 cbind(low,est,up)
 presult[4,,t] <- cbind(low*100,est*100,up*100)
}

########
# Plot #
########

pdata <- matrix(nrow=0, ncol=4)
for (i in 1:dim(presult)[3]) {
    pdata <- rbind(pdata, cbind(presult[,,i], i))
}
pdata <- cbind(pdata, rownames(pdata))
colnames(pdata) <- c("lowerCI", "Estimate", "upperCI", "t", "type")
pdata <- as.data.table(pdata)
pdata$Estimate <- as.numeric(pdata$Estimate)
pdata$t <- as.numeric(pdata$t)
pdata$lowerCI <- as.numeric(pdata$lowerCI)
pdata$upperCI <- as.numeric(pdata$upperCI)
pdata$y <- ifelse(pdata$type=="DQ", 4,
                   ifelse(pdata$type=="D-ICT", 3,
                          ifelse(pdata$type=="RRT", 2, 1)))
pdata$lab <- factor(pdata$y, labels=c("Weisheit\nder Vielen", "Kreuzweise\nRAT",
                                   "Doppeltes\nListenexperiment", "Sonntagsfrage"))

p <- ggplot(data=pdata[type!="ICT1"&type!="ICT2"], aes(x=t, group=type))
p <- p + geom_line(aes(y=Estimate, linetype=as.factor(lab)))
## p <- p + geom_ribbon(aes(ymin=lowerCI,
##                                 ymax=upperCI, fill=as.factor(type)))
p <- p + theme_bw()
p <- p + theme(legend.title=element_blank())
p <- p + theme(panel.grid.major.x = element_blank())
p <- p + theme(panel.grid.major.y = element_blank())
p <- p + theme(panel.grid.minor.x = element_blank())
p <- p + theme(panel.grid.minor.y = element_blank())
p <- p + geom_hline(aes(yintercept=12.6), linetype=2)
p <- p + xlab("Mindest-Beantwortungszeit (in Sekunden)")
p <- p + ylab("Anteil der AfD-Zweitstimmen (in %)")
p <- p + theme(legend.key.height=unit(1,"line"))
p <- p + theme(text = element_text(size=7))
p <- p + theme(legend.position=c(.2,.2))
p <- p + theme(legend.box.background = element_rect(colour = "black"))
p
ggsave("Abb7.tiff", dpi=500, width=8, height=8, units="cm")


